# install.packages("")
library(tidyverse)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 1.4.0 ✔ forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(patchwork) # for muoltiple plotshttps://www12.statcan.gc.ca/census-recensement/2021/as-sa/fogs-spg/page.cfm?topic=2&lang=E&dguid=2021A00055917034#fogsTitle5
Download Options (upper right arrow): csv https://www12.statcan.gc.ca/census-recensement/2021/as-sa/fogs-spg/alternative.cfm?topic=2&lang=E&dguid=2021A00055917034&objectId=5
https://www12.statcan.gc.ca/census-recensement/2021/as-sa/fogs-spg/page.cfm?topic=2&lang=E&dguid=2021A000259#fogsTitle5 ## Download data for BC https://www12.statcan.gc.ca/census-recensement/2021/as-sa/fogs-spg/page.cfm?topic=2&lang=E&dguid=2021A000259#fogsTitle6
skip first 2 rows
Victoria_pop_age_gender_2021 <-
read.csv("Pop-Age-Gender-Victoria-2021-Census-2021A00055917034_5.csv",
skip = 2, header = T)
BC_pop_2021 <-
read.csv("BC-Pop-2021A000259_6.csv",
skip = 2, header = T)
Canada_senior_pop_2021 <-
read.csv("Canada-Senior-pop-percentage-2021A000259_5.csv",
skip = 2, header = T)skip last 12 rows
Victoria_pop_age_gender_2021 <- head(
Victoria_pop_age_gender_2021, -12)
BC_pop_2021 <- head(BC_pop_2021, -12)
Canada_senior_pop_2021 <- head(Canada_senior_pop_2021, -12)class(Victoria_pop_age_gender_2021)## [1] "data.frame"
str(Victoria_pop_age_gender_2021)## 'data.frame': 42 obs. of 3 variables:
## $ Age.groups : chr "0 to 4" "0 to 4" "5 to 9" "5 to 9" ...
## $ Gender : chr "Women+" "Men+" "Women+" "Men+" ...
## $ Population.counts: int 1400 1510 1440 1395 1365 1340 1340 1375 3475 2565 ...
colnames(Victoria_pop_age_gender_2021)## [1] "Age.groups" "Gender" "Population.counts"
print(Victoria_pop_age_gender_2021)## Age.groups Gender Population.counts
## 1 0 to 4 Women+ 1400
## 2 0 to 4 Men+ 1510
## 3 5 to 9 Women+ 1440
## 4 5 to 9 Men+ 1395
## 5 10 to 14 Women+ 1365
## 6 10 to 14 Men+ 1340
## 7 15 to 19 Women+ 1340
## 8 15 to 19 Men+ 1375
## 9 20 to 24 Women+ 3475
## 10 20 to 24 Men+ 2565
## 11 25 to 29 Women+ 4525
## 12 25 to 29 Men+ 4235
## 13 30 to 34 Women+ 4490
## 14 30 to 34 Men+ 4545
## 15 35 to 39 Women+ 3635
## 16 35 to 39 Men+ 3820
## 17 40 to 44 Women+ 2945
## 18 40 to 44 Men+ 3035
## 19 45 to 49 Women+ 2570
## 20 45 to 49 Men+ 2635
## 21 50 to 54 Women+ 2705
## 22 50 to 54 Men+ 2640
## 23 55 to 59 Women+ 2895
## 24 55 to 59 Men+ 2665
## 25 60 to 64 Women+ 3205
## 26 60 to 64 Men+ 2755
## 27 65 to 69 Women+ 3345
## 28 65 to 69 Men+ 2675
## 29 70 to 74 Women+ 3230
## 30 70 to 74 Men+ 2675
## 31 75 to 79 Women+ 2015
## 32 75 to 79 Men+ 1685
## 33 80 to 84 Women+ 1430
## 34 80 to 84 Men+ 985
## 35 85 to 89 Women+ 1065
## 36 85 to 89 Men+ 605
## 37 90 to 94 Women+ 890
## 38 90 to 94 Men+ 350
## 39 95 to 99 Women+ 275
## 40 95 to 99 Men+ 80
## 41 100+ Women+ 40
## 42 100+ Men+ 10
glimpse(Victoria_pop_age_gender_2021)## Rows: 42
## Columns: 3
## $ Age.groups <chr> "0 to 4", "0 to 4", "5 to 9", "5 to 9", "10 to 14", …
## $ Gender <chr> "Women+", "Men+", "Women+", "Men+", "Women+", "Men+"…
## $ Population.counts <int> 1400, 1510, 1440, 1395, 1365, 1340, 1340, 1375, 3475…
class(BC_pop_2021)## [1] "data.frame"
str(BC_pop_2021)## 'data.frame': 42 obs. of 3 variables:
## $ Age.groups : chr "0 to 4" "0 to 4" "5 to 9" "5 to 9" ...
## $ Gender : chr "Women+" "Men+" "Women+" "Men+" ...
## $ Population.counts: int 105805 111015 118485 125810 123365 132425 122445 131245 143145 151505 ...
colnames(BC_pop_2021)## [1] "Age.groups" "Gender" "Population.counts"
print(BC_pop_2021)## Age.groups Gender Population.counts
## 1 0 to 4 Women+ 105805
## 2 0 to 4 Men+ 111015
## 3 5 to 9 Women+ 118485
## 4 5 to 9 Men+ 125810
## 5 10 to 14 Women+ 123365
## 6 10 to 14 Men+ 132425
## 7 15 to 19 Women+ 122445
## 8 15 to 19 Men+ 131245
## 9 20 to 24 Women+ 143145
## 10 20 to 24 Men+ 151505
## 11 25 to 29 Women+ 166585
## 12 25 to 29 Men+ 170500
## 13 30 to 34 Women+ 179660
## 14 30 to 34 Men+ 178925
## 15 35 to 39 Women+ 178175
## 16 35 to 39 Men+ 175175
## 17 40 to 44 Women+ 163260
## 18 40 to 44 Men+ 156480
## 19 45 to 49 Women+ 160515
## 20 45 to 49 Men+ 149520
## 21 50 to 54 Women+ 170460
## 22 50 to 54 Men+ 158540
## 23 55 to 59 Women+ 183825
## 24 55 to 59 Men+ 172520
## 25 60 to 64 Women+ 184920
## 26 60 to 64 Men+ 170210
## 27 65 to 69 Women+ 166700
## 28 65 to 69 Men+ 152705
## 29 70 to 74 Women+ 141995
## 30 70 to 74 Men+ 130370
## 31 75 to 79 Women+ 95015
## 32 75 to 79 Men+ 86060
## 33 80 to 84 Women+ 65780
## 34 80 to 84 Men+ 55285
## 35 85 to 89 Women+ 42005
## 36 85 to 89 Men+ 32315
## 37 90 to 94 Women+ 23120
## 38 90 to 94 Men+ 13720
## 39 95 to 99 Women+ 6925
## 40 95 to 99 Men+ 2865
## 41 100+ Women+ 1185
## 42 100+ Men+ 330
glimpse(BC_pop_2021)## Rows: 42
## Columns: 3
## $ Age.groups <chr> "0 to 4", "0 to 4", "5 to 9", "5 to 9", "10 to 14", …
## $ Gender <chr> "Women+", "Men+", "Women+", "Men+", "Women+", "Men+"…
## $ Population.counts <int> 105805, 111015, 118485, 125810, 123365, 132425, 1224…
glimpse(Canada_senior_pop_2021)## Rows: 26
## Columns: 3
## $ Geography <chr> "Newfoundland and Labrador", "Newfoun…
## $ Years <chr> "2016", "2021", "2016", "2021", "2016…
## $ Proportion.of.the.population.... <dbl> 19.4, 23.6, 19.4, 21.2, 19.9, 22.2, 1…
Victoria_pop_age_gender_2021[
Victoria_pop_age_gender_2021 == "Women+"] <- "Women"
Victoria_pop_age_gender_2021[
Victoria_pop_age_gender_2021 == "Men+"] <- "Men"
BC_pop_2021[BC_pop_2021 == "Women+"] <- "Women"
BC_pop_2021[BC_pop_2021 == "Men+"] <- "Men"colnames(Victoria_pop_age_gender_2021) <-
c("age_group", "gender", "pop")
colnames(BC_pop_2021) <-
c("age_group", "gender", "pop")
colnames(Canada_senior_pop_2021) <-
c("Province", "Year", "PercentageOfPop")Victoria_pop_age_gender_2021$age_group <- factor(
Victoria_pop_age_gender_2021$age_group,
levels = str_sort(unique(
Victoria_pop_age_gender_2021$age_group),
numeric = TRUE))
str(Victoria_pop_age_gender_2021)## 'data.frame': 42 obs. of 3 variables:
## $ age_group: Factor w/ 21 levels "0 to 4","5 to 9",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ gender : chr "Women" "Men" "Women" "Men" ...
## $ pop : int 1400 1510 1440 1395 1365 1340 1340 1375 3475 2565 ...
BC_pop_2021$age_group <- factor(BC_pop_2021$age_group,
levels = str_sort(unique(
BC_pop_2021$age_group), numeric = TRUE))
str(BC_pop_2021)## 'data.frame': 42 obs. of 3 variables:
## $ age_group: Factor w/ 21 levels "0 to 4","5 to 9",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ gender : chr "Women" "Men" "Women" "Men" ...
## $ pop : int 105805 111015 118485 125810 123365 132425 122445 131245 143145 151505 ...
Victoria_pop_age_gender_2021_seniors <-
Victoria_pop_age_gender_2021[Victoria_pop_age_gender_2021$age_group
%in%
c("55 to 59", "60 to 64", "65 to 69", "70 to 74", "75 to 79",
"80 to 84", "85 to 89", "90 to 94", "95 to 99", "100+"), ]
BC_pop_2021_seniors <-
BC_pop_2021[BC_pop_2021$age_group %in%
c("55 to 59", "60 to 64", "65 to 69", "70 to 74", "75 to 79",
"80 to 84", "85 to 89", "90 to 94", "95 to 99", "100+"), ]to use for labels
# this changes values of y axis,
BC_pop_2021$pop_comma <- formatC(
BC_pop_2021$pop,
format = "d", big.mark = ",")
BC_pop_2021_seniors$pop_comma <- formatC(
BC_pop_2021_seniors$pop,
format = "d", big.mark = ",")
Victoria_pop_age_gender_2021$pop_comma <- formatC(
Victoria_pop_age_gender_2021$pop,
format = "d", big.mark = ",")
Victoria_pop_age_gender_2021_seniors$pop_comma <- formatC(
Victoria_pop_age_gender_2021_seniors$pop,
format = "d", big.mark = ",")Victoria_pop_age_gender_2021_wide <- Victoria_pop_age_gender_2021 %>%
pivot_wider(names_from = gender, values_from = pop)
Canada_senior_pop_2021_wide <- Canada_senior_pop_2021 %>%
pivot_wider(names_from = Year, values_from = PercentageOfPop)class(Victoria_pop_age_gender_2021_wide)## [1] "tbl_df" "tbl" "data.frame"
str(Victoria_pop_age_gender_2021_wide)## tibble [42 × 4] (S3: tbl_df/tbl/data.frame)
## $ age_group: Factor w/ 21 levels "0 to 4","5 to 9",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ pop_comma: chr [1:42] "1,400" "1,510" "1,440" "1,395" ...
## $ Women : int [1:42] 1400 NA 1440 NA 1365 NA 1340 NA 3475 NA ...
## $ Men : int [1:42] NA 1510 NA 1395 NA 1340 NA 1375 NA 2565 ...
colnames(Victoria_pop_age_gender_2021_wide)## [1] "age_group" "pop_comma" "Women" "Men"
print(Victoria_pop_age_gender_2021_wide)## # A tibble: 42 × 4
## age_group pop_comma Women Men
## <fct> <chr> <int> <int>
## 1 0 to 4 1,400 1400 NA
## 2 0 to 4 1,510 NA 1510
## 3 5 to 9 1,440 1440 NA
## 4 5 to 9 1,395 NA 1395
## 5 10 to 14 1,365 1365 NA
## 6 10 to 14 1,340 NA 1340
## 7 15 to 19 1,340 1340 NA
## 8 15 to 19 1,375 NA 1375
## 9 20 to 24 3,475 3475 NA
## 10 20 to 24 2,565 NA 2565
## # … with 32 more rows
glimpse(Victoria_pop_age_gender_2021_wide)## Rows: 42
## Columns: 4
## $ age_group <fct> 0 to 4, 0 to 4, 5 to 9, 5 to 9, 10 to 14, 10 to 14, 15 to 19…
## $ pop_comma <chr> "1,400", "1,510", "1,440", "1,395", "1,365", "1,340", "1,340…
## $ Women <int> 1400, NA, 1440, NA, 1365, NA, 1340, NA, 3475, NA, 4525, NA, …
## $ Men <int> NA, 1510, NA, 1395, NA, 1340, NA, 1375, NA, 2565, NA, 4235, …
glimpse(Canada_senior_pop_2021_wide)## Rows: 13
## Columns: 3
## $ Province <chr> "Newfoundland and Labrador", "Prince Edward Island", "Nova Sc…
## $ `2016` <dbl> 19.4, 19.4, 19.9, 19.9, 18.3, 16.7, 15.6, 15.5, 12.3, 18.3, 1…
## $ `2021` <dbl> 23.6, 21.2, 22.2, 22.8, 20.6, 18.5, 17.1, 17.5, 14.8, 20.3, 1…
str(Canada_senior_pop_2021_wide)## tibble [13 × 3] (S3: tbl_df/tbl/data.frame)
## $ Province: chr [1:13] "Newfoundland and Labrador" "Prince Edward Island" "Nova Scotia" "New Brunswick" ...
## $ 2016 : num [1:13] 19.4 19.4 19.9 19.9 18.3 16.7 15.6 15.5 12.3 18.3 ...
## $ 2021 : num [1:13] 23.6 21.2 22.2 22.8 20.6 18.5 17.1 17.5 14.8 20.3 ...
factor, str_sort unique
Victoria_pop_age_gender_2021_wide$age_group <- factor(
Victoria_pop_age_gender_2021_wide$age_group,
levels = str_sort(unique(
Victoria_pop_age_gender_2021_wide$age_group),
numeric = TRUE))
str(Victoria_pop_age_gender_2021_wide)## tibble [42 × 4] (S3: tbl_df/tbl/data.frame)
## $ age_group: Factor w/ 21 levels "0 to 4","5 to 9",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ pop_comma: chr [1:42] "1,400" "1,510" "1,440" "1,395" ...
## $ Women : int [1:42] 1400 NA 1440 NA 1365 NA 1340 NA 3475 NA ...
## $ Men : int [1:42] NA 1510 NA 1395 NA 1340 NA 1375 NA 2565 ...
Victoria_pop_age_gender_2021_wide$index <- as.numeric(
row.names(Victoria_pop_age_gender_2021_wide))
Victoria_pop_age_gender_2021_wide[order(
Victoria_pop_age_gender_2021_wide$index), ]## # A tibble: 42 × 5
## age_group pop_comma Women Men index
## <fct> <chr> <int> <int> <dbl>
## 1 0 to 4 1,400 1400 NA 1
## 2 0 to 4 1,510 NA 1510 2
## 3 5 to 9 1,440 1440 NA 3
## 4 5 to 9 1,395 NA 1395 4
## 5 10 to 14 1,365 1365 NA 5
## 6 10 to 14 1,340 NA 1340 6
## 7 15 to 19 1,340 1340 NA 7
## 8 15 to 19 1,375 NA 1375 8
## 9 20 to 24 3,475 3475 NA 9
## 10 20 to 24 2,565 NA 2565 10
## # … with 32 more rows
Canada_senior_pop_2021$index <- as.numeric(
row.names(Canada_senior_pop_2021))
Canada_senior_pop_2021[order(Canada_senior_pop_2021$index), ]## Province Year PercentageOfPop index
## 1 Newfoundland and Labrador 2016 19.4 1
## 2 Newfoundland and Labrador 2021 23.6 2
## 3 Prince Edward Island 2016 19.4 3
## 4 Prince Edward Island 2021 21.2 4
## 5 Nova Scotia 2016 19.9 5
## 6 Nova Scotia 2021 22.2 6
## 7 New Brunswick 2016 19.9 7
## 8 New Brunswick 2021 22.8 8
## 9 Quebec 2016 18.3 9
## 10 Quebec 2021 20.6 10
## 11 Ontario 2016 16.7 11
## 12 Ontario 2021 18.5 12
## 13 Manitoba 2016 15.6 13
## 14 Manitoba 2021 17.1 14
## 15 Saskatchewan 2016 15.5 15
## 16 Saskatchewan 2021 17.5 16
## 17 Alberta 2016 12.3 17
## 18 Alberta 2021 14.8 18
## 19 British Columbia 2016 18.3 19
## 20 British Columbia 2021 20.3 20
## 21 Yukon 2016 11.9 21
## 22 Yukon 2021 15.0 22
## 23 Northwest Territories 2016 7.7 23
## 24 Northwest Territories 2021 10.0 24
## 25 Nunavut 2016 3.8 25
## 26 Nunavut 2021 4.4 26
plot(Victoria_pop_age_gender_2021)PROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14 >>
Sort values of age_group to ensure 100+ appears in correct position
factor, str_sort unique
# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b")
Vic_1 <- Victoria_pop_age_gender_2021 %>%
ggplot(aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", alpha = 0.95, width = 0.85) +
# width = 1 no space between bars
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "Victoria, BC Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_1 # colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b")
BC <- BC_pop_2021 %>%
ggplot(aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", alpha = 0.95, width = 0.85) +
# width = 1 no space between bars
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "BC Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021") +
scale_y_continuous(limits = c(0, 400000), labels = scales::comma)
# to avoid scientific notation
BCneed to find the cumulative sum for each stack
https://r-graphics.org/recipe-bar-graph-labels#cb84-7
# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
# get cumulative sum
BC_pop_2021 <- BC_pop_2021 %>%
group_by(age_group) %>%
mutate(label_y = cumsum(pop))
colour_palette <- c("#af8dc3", "#7fbf7b")
BC_lab <- BC_pop_2021 %>%
ggplot(aes(x = age_group, y = pop, fill = gender)) +
geom_col() +
geom_text(aes(y = label_y, label = pop_comma),
vjust = 1.25, size = 2) +
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "BC Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021") +
scale_y_continuous(limits = c(0, 400000), labels = scales::comma)
BC_labneed to find the cumulative sum for each stack
https://r-graphics.org/recipe-bar-graph-labels#cb84-7
# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
# get cumulative sum
Victoria_pop_age_gender_2021 <- Victoria_pop_age_gender_2021 %>%
group_by(age_group) %>%
mutate(label_y = cumsum(pop))
colour_palette <- c("#af8dc3", "#7fbf7b")
Vic_lab <- Victoria_pop_age_gender_2021 %>%
ggplot(aes(x = age_group, y = pop, fill = gender)) +
geom_col() +
geom_text(aes(y = label_y, label = pop_comma),
vjust = 1.25, size = 3) +
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "Victoria, BC Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_labPROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14 >>
Sort values of age_group to ensure 100+ appears in correct position
factor, str_sort unique
# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b")
Vic_dodge <- Victoria_pop_age_gender_2021 %>%
ggplot(aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", position = position_dodge(),
alpha = 0.95, width = 0.85) +
# width = 1 no space between bars
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "Victoria, BC Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_dodge# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b")
BC_dodge <- BC_pop_2021 %>%
ggplot(aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", position = position_dodge(),
alpha = 0.95, width = 0.85) +
# width = 1 no space between bars
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "BC Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021") +
scale_y_continuous(limits = c(0, 200000), labels = scales::comma)
BC_dodgePROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14 >>
Sort values of age_group to ensure 100+ appears in correct position
factor, str_sort unique
# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b")
Vic_seniors <- Victoria_pop_age_gender_2021_seniors %>%
ggplot(aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", position = position_dodge(),
alpha = 0.95, width = 0.85) +
# wideth = 1 no space between bars
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "Victoria, BC Seniors Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_seniorsPROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14 >>
Sort values of age_group to ensure 100+ appears in correct position
factor, str_sort unique
# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b")
BC_seniors <- BC_pop_2021_seniors %>%
ggplot(aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", position = position_dodge(),
alpha = 0.95, width = 0.85) +
# wideth = 1 no space between bars
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "BC Seniors Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
BC_seniors# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b")
Vic_seniors_lab <- Victoria_pop_age_gender_2021_seniors %>%
ggplot(aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", position = "dodge",
alpha = 0.95, width = 0.85) +
# wideth = 1 no space between bars
geom_text(aes(label = pop_comma), size = 3,
position = position_dodge(width = 0.9), vjust = -0.25) +
# vjust negative shows above bar
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "Victoria, BC Seniors Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_seniors_lab# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b")
Vic_seniors_lab_leg_bot <- ggplot(
Victoria_pop_age_gender_2021_seniors,
aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", position = "dodge",
alpha = 0.95, width = 0.85) +
# wideth = 1 no space between bars
geom_text(aes(label = pop_comma), size = 3,
position = position_dodge(width = 0.9), vjust = -0.25) +
# vjust negative shows above bar
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "Victoria, BC Seniors Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_seniors_lab_leg_botPROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14
Vic_women <- Victoria_pop_age_gender_2021_wide %>%
ggplot(aes(x = age_group, y = Women)) +
geom_bar(stat = "identity", position = position_dodge(),
alpha = 0.75, width = 0.75, fill = "#d8b365") +
# width = 1 no space between bars
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1)
) +
labs(title = "Victoria, BC Women Population by Age",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_women## Warning: Removed 21 rows containing missing values (geom_bar).
PROBLEM: age grouping not correct e.g. 100+ comes after 10 to 14
Vic_men <- Victoria_pop_age_gender_2021_wide %>%
ggplot(aes(x = age_group, y = Men)) +
geom_bar(stat = "identity", position = position_dodge(),
alpha = 0.75, width = 0.75, fill = "#d8b365") +
# width = 1 no space between bars
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
) +
labs(title = "Victoria, BC Men's Population by Age",
subtitle = "(2021 Canada Census)",
x = "Age Group",
y = "Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021")
Vic_men## Warning: Removed 21 rows containing missing values (geom_bar).
# colour_palette <- c("#d8b365", "#5ab4ac")
# Colour brewer color-blind safe
colour_palette <- c("#af8dc3", "#7fbf7b")
# https://r-graph-gallery.com/267-reorder-a-variable-in-ggplot2.html
Canada_senior_pop_2021 %>% mutate(
Province = fct_reorder(Province, index)) %>%
ggplot(aes(x = Province, y = PercentageOfPop, fill = Year)) +
geom_bar(stat = "identity", position = position_dodge(),
alpha = 0.95, width = 0.85) +
# width = 1 no space between bars
geom_text(aes(label = PercentageOfPop), size = 3,
position = position_dodge(width = 0.9),
vjust = 0.5, hjust = 1.2) +
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 360, hjust = 1),
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = "Canadian Seniors as Percent of Population",
subtitle = "(2016 & 2021 Canada Census)",
x = NULL,
y = "% of Population",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021") +
scale_y_continuous(limits = c(0, 25)) +
coord_flip() +
# https://www.geeksforgeeks.org/reversing-the-order-of-a-ggplot2-legend/
guides(fill = guide_legend(reverse=TRUE)) ### Save file
aspect_ratio <- 2.5
ggsave("Canada_senior_pop_perc_2016_2021.jpg",
height = 10 , width = 4 * aspect_ratio)BC + BC_dodge # compare stacked to side-by-sideBC + BC_labVic_lab + Vic_1Vic_dodge + BC_dodgeVic_men + Vic_women## Warning: Removed 21 rows containing missing values (geom_bar).
## Removed 21 rows containing missing values (geom_bar).
BC_seniors + Vic_seniorsVic_seniors + Vic_seniors_labVic_seniors_lab + Vic_seniors_lab_leg_botVic_seniors_lab / Vic_seniors_lab_leg_botVic_1 + Vic_dodge # compare stacked to side-by-side(BC + BC_dodge) / (Vic_1 + Vic_dodge)BC + BC_dodge + Vic_1 + Vic_dodge +
plot_layout(widths = c(2, 1))census_age_gender_ggplot_save <- function(data, colpat1, colpat2,
title, subtitle, caption, labelsize,
filename, aspect_ratio = 2.5){
# clean data
data[data == "Women+"] <- "Women"
data[data == "Men+"] <- "Men"
colnames(data) <- c("age_group", "gender", "pop")
# sort age values
data$age_group <- factor(
data$age_group, levels = str_sort(unique(data$age_group),
numeric = TRUE))
# Colour brewer color-blind safe
colour_palette <- c(colpat1, colpat2)
# ggplot
ggplot(data, aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", position = "dodge",
alpha = 0.95, width = 0.85) +
# wideth = 1 no space between bars
geom_text(aes(label = pop), size = labelsize,
position = position_dodge(width = 0.9),
vjust = 0.35, hjust = 1.05, angle = 90) +
# vjust negative shows above bar
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(
size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = title,
subtitle = subtitle,
x = "Age Group",
y = "Population",
caption = caption)
aspect_ratio <- aspect_ratio
ggsave(filename, height = 10 , width = 4 * aspect_ratio)
}without the pop_comma for thousands (can’t figure out how to create in function)
p1 <- census_age_gender_ggplot_save(
data = Victoria_pop_age_gender_2021_seniors,
colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
title = "Victoria Seniors Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
labelsize = 3,
filename = "Victoria_pop_age_gender_2021_seniors.jpg"
)
p1## [1] "Victoria_pop_age_gender_2021_seniors.jpg"
# this changes values of y axis,
# BC_pop_2021$pop <- formatC(BC_pop_2021$pop, format = "d", big.mark = ",")
p2 <- census_age_gender_ggplot_save(data = BC_pop_2021,
colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
title = "BC Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
labelsize = 3,
filename = "BC_pop_2021.jpg"
)
p2## [1] "BC_pop_2021.jpg"
p3 <- census_age_gender_ggplot_save(data = BC_pop_2021_seniors,
colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
title = "BC Seniors Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
labelsize = 3,
filename = "BC_pop_2021_seniors.jpg"
)
p3## [1] "BC_pop_2021_seniors.jpg"
census_age_gender_ggplot <- function(
data, colpat1, colpat2, title, subtitle, caption, labelsize){
# clean data
data[data == "Women+"] <- "Women"
data[data == "Men+"] <- "Men"
colnames(data) <- c("age_group", "gender", "pop")
# sort age values
data$age_group <- factor(
data$age_group, levels = str_sort(unique(data$age_group),
numeric = TRUE))
# Colour brewer color-blind safe
colour_palette <- c(colpat1, colpat2)
# ggplot
age_group_plot <- ggplot(
data, aes(x = age_group, y = pop, fill = gender)) +
geom_bar(stat = "identity", position = "dodge",
alpha = 0.95, width = 0.85) +
# wideth = 1 no space between bars
geom_text(aes(label = pop), size = labelsize,
position = position_dodge(width = 0.9),
vjust = 0.35, hjust = 1.05, angle = 90) +
# vjust negative shows above bar
theme_light() +
theme(
plot.title = element_text(
size = rel(1.5), face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = rel(.92), hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_manual(values = colour_palette) +
labs(title = title,
subtitle = subtitle,
x = "Age Group",
y = "Population",
caption = caption)
}save_ggplot <- function(filename, aspect_ratio = 2.5){
aspect_ratio <- aspect_ratio
ggsave(filename, height = 10 , width = 4 * aspect_ratio)
}p1_plot <- census_age_gender_ggplot(
data = Victoria_pop_age_gender_2021_seniors,
colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
title = "Victoria Seniors Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
labelsize = 3
)
p1_plotp1_save <- save_ggplot(
filename = "Victoria_pop_age_gender_2021_seniors_1.jpg")
p1_save## [1] "Victoria_pop_age_gender_2021_seniors_1.jpg"
# this changes values of y axis,
# BC_pop_2021$pop <- formatC(BC_pop_2021$pop, format = "d", big.mark = ",")
p2_plot <- census_age_gender_ggplot(
data = BC_pop_2021,
colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
title = "BC Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
labelsize = 3
)
p2_plotp2_save <- save_ggplot(filename = "BC_pop_2021_1.jpg")
p2_save## [1] "BC_pop_2021_1.jpg"
p3_plot <- census_age_gender_ggplot(
data = BC_pop_2021_seniors,
colpat1 = "#af8dc3", colpat2 = "#7fbf7b",
title = "BC Seniors Population \n by Age & Gender",
subtitle = "(2021 Canada Census)",
caption = "Data: https://www12.statcan.gc.ca/census-recensement/2021",
labelsize = 3
)
p3_plotp3_save <- save_ggplot(filename = "BC_pop_2021_seniors_1.jpg")
p3_save## [1] "BC_pop_2021_seniors_1.jpg"
# to document specific packages used to run script
sessionInfo()## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.1 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9
## [5] purrr_0.3.4 readr_1.4.0 tidyr_1.2.0 tibble_3.1.7
## [9] ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.7 lubridate_1.7.10 assertthat_0.2.1 digest_0.6.27
## [5] utf8_1.2.1 R6_2.5.0 cellranger_1.1.0 backports_1.1.10
## [9] reprex_2.0.0 evaluate_0.15 httr_1.4.2 highr_0.9
## [13] pillar_1.7.0 rlang_1.0.2 readxl_1.3.1 rstudioapi_0.13
## [17] jquerylib_0.1.4 rmarkdown_2.15 textshaping_0.2.1 labeling_0.4.2
## [21] munsell_0.5.0 broom_0.8.0 compiler_4.0.2 modelr_0.1.8
## [25] xfun_0.30 pkgconfig_2.0.3 systemfonts_1.0.1 htmltools_0.5.2
## [29] tidyselect_1.1.2 fansi_0.5.0 crayon_1.4.1 dbplyr_2.1.1
## [33] withr_2.4.2 grid_4.0.2 jsonlite_1.7.2 gtable_0.3.0
## [37] lifecycle_1.0.1 DBI_1.1.1 magrittr_2.0.3 scales_1.2.0
## [41] cli_3.3.0 stringi_1.6.2 farver_2.1.0 fs_1.5.2
## [45] xml2_1.3.2 bslib_0.3.1 ellipsis_0.3.2 ragg_0.4.0
## [49] generics_0.1.0 vctrs_0.4.1 tools_4.0.2 glue_1.6.2
## [53] hms_1.0.0 fastmap_1.1.0 yaml_2.2.1 colorspace_2.0-1
## [57] rvest_1.0.0 knitr_1.39 haven_2.5.0 sass_0.4.0.9000